from dataclasses import replace
import pprint
import scrapy
from twisted.web.http import parseContentRange


class MovieSpiderSpider(scrapy.Spider):
    name = "movie_spider"
    allowed_domains = ["kangtemu.com"]
    start_urls = ["https://www.kangtemu.com/dianying/index_____hits_{}.html".format(i) for i in range(1, 2)]
    host_url = "https://www.kangtemu.com"


    def parse(self, response):
        movie_card = response.xpath('//*[@id="content"]/li')
        # print(movie_card)
        for card in movie_card:
            link = card.xpath('./a/@href').get()
            # print(link)
            url = "{}{}".format(self.host_url, link)
            # print(url)
            yield scrapy.Request(url=url, callback=self.parse_movie_detail)

    def parse_movie_detail(self, response):
        # print(response.text)
        card_info = response.xpath('.//div[1]/div/div[2]/div[1]/div[2]')
        for item in card_info:
            item = {}
            item['片名'] = card_info.xpath('./div/h1/text()').get()
            item['清晰度'] = card_info.xpath('./div/ul/li[2]/text()').get()
            item['类型'] = card_info.xpath('./div/ul/li[3]/a/text()').get()
            item['主演'] = card_info.xpath('./div/ul/li[4]/a/text()').extract()
            item['导演'] = card_info.xpath('./div/ul/li[6]/a/text()').extract()
            item['国家/地区'] = card_info.xpath('./div/ul/li[7]/text()').get()
            item['时长'] = card_info.xpath('./div/ul/li[8]/text()').get()
            item['语言'] = card_info.xpath('./div/ul/li[9]/text()').get()
            item['年代'] = card_info.xpath('./div/ul/li[10]/text()').get()
            item['更新时间'] = card_info.xpath('./div/ul/li[11]/text()').get()
            item['详细介绍'] = card_info.xpath('./div/ul/li[13]/span[3]/text()').get()
            return item
        # pprint.pprint (item, indent=4)

